import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import sys
!conda install --yes --prefix {sys.prefix} plotly
Collecting package metadata (current_repodata.json): done Solving environment: done # All requested packages already installed.
numpy un tensor de 0D, 1D, 2D y uno 3D.¶a = np.array(4)
a
array(4)
b = np.array([6,43,1])
b
array([ 6, 43, 1])
c = np.array([[4,68,2],[324,7,3],[124,1,29]])
c
array([[ 4, 68, 2],
[324, 7, 3],
[124, 1, 29]])
d = np.array([[[8,76]],[[5,653]]])
d
array([[[ 8, 76]],
[[ 5, 653]]])
shape) de MNIST. Esta tabla viene con el paquete tensorflow y se obtiene como sigue:¶from tensorflow.keras.datasets import mnist
from keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images.shape
(60000, 28, 28)
test_images.shape
(10000, 28, 28)
MNIST?¶train_images.dtype
dtype('uint8')
test_labels.dtype
dtype('uint8')
relu() es una operación que se aplica entrada por entrada de un vector, esta devuelve el máximo entre cada entrada del vector y 0 (relu(x) = max(x,0)). Reprograme esta función en Python y después pruébela con el siguiente vector x.¶x = np.array[-1,3,-0.2,15].
Imprima el resultado
x = np.array([-1,3,-0.2,15])
def naive_relu(x):
y = x.copy()
for i in range(x.shape[0]):
y[i] = max(0,x[i])
print(y)
naive_relu(x)
[ 0. 3. 0. 15.]
Python):¶a = np.array([[8,4],[1,9]])
b = np.array([[2],[5]])
np.dot(a,b)
array([[36],
[47]])
a = np.array([[3,6],[5,4]])
b = np.array([[1,3],[7,2]])
np.dot(a,b)
array([[45, 21],
[33, 23]])
a = np.array([[8,5,1],[1,2,4],[7,3,2]])
b = np.array([[1,3],[4,5],[2,3]])
np.dot(a,b)
array([[30, 52],
[17, 25],
[23, 42]])
a) $f(x) = \sin(4x)$ \ $f'(x)$ = 4cos(4x)
b) $f(x) = 6x^2 + 12x$ \ $f'(x) = 12x + 12$
c) $f(x) = \sqrt{x^3}$ \ $f'(x) = \dfrac{3}{2} \sqrt{x}$ $\vee$ $\dfrac{3x^2}{2\sqrt{x^3}}$
d) $f(x) = \cos(7x + 1)$ \ $f'(x)= -7sen(7x+1)$
e) $f(x) = 3 + \ln(9x)$ \ $f'(x) = \dfrac{9}{9x}= \dfrac{1}{x}$
f) $f(x) = (x^3-1)^{100}$ \ \begin{align*} f'(x) &= 3x^2\cdot100(x^3-1)^{99} \\ &=300x^2(x^3-1)^{99} \end{align*}
g) $f(x) = \dfrac{1}{\sqrt[3]{x^2+x+1}}$ \ \ Tome $a = x^2 + x + 1$
De esta forma
$f(x) = \dfrac{1}{\sqrt[3]{a}}$
Entonces, por regla de la cadena
\begin{align*} f'(x) &= \dfrac{-1}{(\sqrt[3]{a})^2} \cdot \dfrac{1}{3\sqrt[3]{a^2}} \cdot a' \\ f'(x) &= \dfrac{-1}{a^{2/3}} \cdot \dfrac{1}{3a^{2/3}} \cdot a' \\ f'(x) &= \dfrac{-1}{3a^{4/3}} \cdot a' \\ \end{align*}Sustituyendo $a$ por la expresión original
$f'(x) = \dfrac{-1}{3 \cdot (x^2 + x + 1)^{4/3}} \cdot 2x+1$
Tome $x_0 = 2$ y note que $f'(x) = 12x^3 - 48x^2 + 36x$
Paso 1
$f(x_0) = 3(2)^4 - 16(2)^3 +18(2)^2 = -8$
Paso 2
$f'(x_0) = f'(2) = -24$
Paso 3
Tomando $\eta = 0.05$
$x_1 = x_0 - \eta \cdot f'(x_0) = 2 - 0.05 \cdot -24 = 3.2$
$\Rightarrow f(x_1) = f(3.2)= -25.3952$
Tenemos el par $(3.2,-25.3952)$
Pythonfigsize = (6,4)
dpi = 150
def f(x):
return 3 * x**4 -16 * x**3 + 18 * x**2
fig, ax = plt.subplots(1,1, figsize = figsize, dpi = dpi)
t1 = np.arange(-5.0, 5.0, 0.1)
ax.set_xlim(-5, 5)
ax.set_ylim(-45, 45)
ax.plot(t1, f(t1))
x = [2.0]
y = [f(x[0])]
ax.plot(x,y,'ro')
print("X = ", x)
print("Y = ", y)
X = [2.0] Y = [-8.0]
Derivada de f
def df(x):
return 12*x**3 - 48*x**2 + 36*x
pendiente = df(x[0])
print("Pendiente: ", pendiente)
eta = 0.05
x.append(x[0] - eta * pendiente)
y.append(f(x[0] - eta * pendiente))
print("X = ", x)
print("Y = ", y)
fig, ax = plt.subplots(1,1, figsize = figsize, dpi = dpi)
ax.set_xlim(-1, 5)
ax.set_ylim(-45, 45)
ax.plot(t1, f(t1))
ax.plot(x,y,'ro')
Pendiente: -24.0 X = [2.0, 3.2] Y = [-8.0, -25.395200000000045]
[<matplotlib.lines.Line2D at 0x7fad61373280>]
Python.¶Note que $f_x(x,y) = 2x -2$ y $f_y(x,y)= 2y -6$
$\nabla f = \begin{bmatrix} 2x - 2 \\ 2y -6 \end{bmatrix}$
Sea $x_0 = \begin{bmatrix} -5 \\ 5 \end{bmatrix}. $ Entonces $f(x_0) = 44 $
Tomando $\eta = 0.1$
$x_1 = x_0 - \eta \cdot f'(x_0) = \begin{bmatrix} -5 \\ 5 \end{bmatrix} - 0.1 \cdot \begin{bmatrix} -12 \\ 4 \end{bmatrix} = \begin{bmatrix} -3.8 \\ 4.6 \end{bmatrix}$
$\Rightarrow f(x_1) = f\Big(\begin{bmatrix} -3.8 \\ 4.6 \end{bmatrix}\Big)= -29.5999$
def f(x,y):
return x**2 + y**2 - 2*x - 6*y + 14
print("f(-5,5) = ", f(-5,5),"\n")
x = -5
y = 5
x0 = np.array([x,y])
print("x, y : ",x0)
fx0 = f(x0[0],x0[1])
print("fx0:",fx0)
def gradiente_f(x,y):
return np.array([2*x - 2,2*y - 6])
print("gradiente_f: ", gradiente_f(-5,5))
f(-5,5) = 44 x, y : [-5 5] fx0: 44 gradiente_f: [-12 4]
eta = 0.1
x1 = x0 - eta * gradiente_f(x0[0],x0[1])
print("x, y :", x1, "\tz: ",f(x1[0],x1[1]), "\n")
x, y : [-3.8 4.6] z: 29.599999999999998
import plotly.graph_objs as go
import plotly.express as px
x = -5
y = 5
eta = 0.1
xi = np.array([x,y])
x = []
y = []
z = []
for i in range(2):
xi = xi - eta * gradiente_f(xi[0],xi[1])
x.append(xi[0])
y.append(xi[1])
z.append(f(xi[0],xi[1]))
dataF = pd.DataFrame({"X":x, "Y": y, "Z": z})
#Imprimimos en consola solamente 10
print(dataF.head(n = 1))
#Gráfico
fig = px.scatter_3d(dataF, x='X', y='Y', z='Z',color_discrete_sequence=["red"])
xdata = np.arange(-10, 10, 0.1)
ydata = np.arange(-10, 10, 0.1)
X,Y = np.meshgrid(xdata,ydata)
Z = X**2 + Y**2 - 2*X - 6*Y + 14
fig.add_trace(go.Surface(
x = X,
y = Y,
z = Z,
opacity = .7, showscale = False,
colorscale='Viridis'
))
fig.show()
X Y Z 0 -3.8 4.6 29.6
Python.¶Note que $f_x(x,y) = 4x^3 -4y$ y $f_y(x,y)= 4y^3 -4x$
$\nabla f = \begin{bmatrix} 4x^3 - 4y \\ 4y^3 - 4x \end{bmatrix}$
Sea $x_0 = \begin{bmatrix} -0.5 \\ 0.5 \end{bmatrix}. $ Entonces $f(x_0) = 2.125 $
Tomando $\eta = 0.1$
$x_1 = x_0 - \eta \cdot f'(x_0) = \begin{bmatrix} -0.5 \\ 0.5 \end{bmatrix} - 0.1 \cdot \begin{bmatrix} -2.5 \\ 2.5 \end{bmatrix} = \begin{bmatrix} -0.25 \\ 0.25 \end{bmatrix}$
$\Rightarrow f(x_1) = f\Big(\begin{bmatrix} -0.25 \\ 0.25 \end{bmatrix}\Big)= 1.2578$
(-0.25)**4 + (0.25)**4 - 4*(-0.25)*(0.25)+1
1.2578125
def f(x,y):
return x**4 + y**4 -4*x*y + 1
print("f(-0.5,0.5) = ", f(-0.5,0.5),"\n")
x = -0.5
y = 0.5
x0 = np.array([x,y])
print("x, y : ",x0)
fx0 = f(x0[0],x0[1])
print("fx0:",fx0)
def gradiente_f(x,y):
return np.array([4*(x**3)-4*y,4*(y**3)-4*x])
print("gradiente_f: ", gradiente_f(-0.5,0.5))
f(-0.5,0.5) = 2.125 x, y : [-0.5 0.5] fx0: 2.125 gradiente_f: [-2.5 2.5]
eta = 0.1
x1 = x0 - eta * gradiente_f(x0[0],x0[1])
print("x, y :", x1, "\tz: ",f(x1[0],x1[1]), "\n")
x, y : [-0.25 0.25] z: 1.2578125
import plotly.graph_objs as go
import plotly.express as px
def f(x,y):
return x**4 + y**4 -4*x*y + 1
def gradiente_f(x,y):
return np.array([4*(x**3)-4*y,4*(y**3)-4*x])
x = -0.5
y = 0.5
eta = 0.1
xi = np.array([x,y])
x = []
y = []
z = []
for i in range(2):
xi = xi - eta * gradiente_f(xi[0],xi[1])
x.append(xi[0])
y.append(xi[1])
z.append(f(xi[0],xi[1]))
dataF = pd.DataFrame({"X":x, "Y": y, "Z": z})
#Imprimimos en consola solamente 10
print(dataF.head(n = 1))
#Gráfico
fig = px.scatter_3d(dataF, x='X', y='Y', z='Z',color_discrete_sequence=["red"])
xdata = np.arange(-3, 3, 0.05)
ydata = np.arange(-3, 3, 0.05)
X,Y = np.meshgrid(xdata,ydata)
Z = X**4 + Y**4 -4*X*Y + 1
fig.add_trace(go.Surface(
x = X,
y = Y,
z = Z,
opacity = .7, showscale = False,
colorscale='Viridis'
))
fig.show()
X Y Z 0 -0.25 0.25 1.257812
gradient_descent) se usa un momentum (impulso) más pequeño a 0.7?¶gradient_descent) se usa un $\eta$ (learning rate) de $10^{−9}$?¶